This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
## R version 4.3.2 (2023-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/Los_Angeles
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.33 R6_2.5.1 fastmap_1.1.1 xfun_0.41
## [5] cachem_1.0.8 knitr_1.45 htmltools_0.5.7 rmarkdown_2.25
## [9] cli_3.6.1 sass_0.4.7 jquerylib_0.1.4 compiler_4.3.2
## [13] rstudioapi_0.15.0 tools_4.3.2 evaluate_0.23 bslib_0.5.1
## [17] yaml_2.3.7 rlang_1.1.2 jsonlite_1.8.7
## Loading required package: ggplot2
## Loading required package: lattice
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Get the current working directory
getwd()
## [1] "G:/code/git/jjsmu/CaseStudy2DDS"
# Set the current directory to the data folder
#setwd()
# List files in root dir
list.files()
## [1] "CaseStudy2.pptx" "CaseStudy2.Rmd" "CaseStudy2_files"
## [4] "CaseStudy2DDS.Rproj" "data" "README.md"
# Fetch data set from local folder
df_cs2 = read.csv("data/CaseStudy2-data.csv", header = TRUE) # TODO change to relative path file
df_cs2_no_attrition = read.csv("data/CaseStudy2CompSet No Attrition.csv", header = TRUE) # TODO change to relative path file
df_cs2_no_salary = read.csv("data/CaseStudy2CompSet No Salary.csv", header = TRUE) # TODO change to relative path file
## 'data.frame': 870 obs. of 36 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Age : int 32 40 35 32 24 27 41 37 34 34 ...
## $ Attrition : chr "No" "No" "No" "No" ...
## $ BusinessTravel : chr "Travel_Rarely" "Travel_Rarely" "Travel_Frequently" "Travel_Rarely" ...
## $ DailyRate : int 117 1308 200 801 567 294 1283 309 1333 653 ...
## $ Department : chr "Sales" "Research & Development" "Research & Development" "Sales" ...
## $ DistanceFromHome : int 13 14 18 1 2 10 5 10 10 10 ...
## $ Education : int 4 3 2 4 1 2 5 4 4 4 ...
## $ EducationField : chr "Life Sciences" "Medical" "Life Sciences" "Marketing" ...
## $ EmployeeCount : int 1 1 1 1 1 1 1 1 1 1 ...
## $ EmployeeNumber : int 859 1128 1412 2016 1646 733 1448 1105 1055 1597 ...
## $ EnvironmentSatisfaction : int 2 3 3 3 1 4 2 4 3 4 ...
## $ Gender : chr "Male" "Male" "Male" "Female" ...
## $ HourlyRate : int 73 44 60 48 32 32 90 88 87 92 ...
## $ JobInvolvement : int 3 2 3 3 3 3 4 2 3 2 ...
## $ JobLevel : int 2 5 3 3 1 3 1 2 1 2 ...
## $ JobRole : chr "Sales Executive" "Research Director" "Manufacturing Director" "Sales Executive" ...
## $ JobSatisfaction : int 4 3 4 4 4 1 3 4 3 3 ...
## $ MaritalStatus : chr "Divorced" "Single" "Single" "Married" ...
## $ MonthlyIncome : int 4403 19626 9362 10422 3760 8793 2127 6694 2220 5063 ...
## $ MonthlyRate : int 9250 17544 19944 24032 17218 4809 5561 24223 18410 15332 ...
## $ NumCompaniesWorked : int 2 1 2 1 1 1 2 2 1 1 ...
## $ Over18 : chr "Y" "Y" "Y" "Y" ...
## $ OverTime : chr "No" "No" "No" "No" ...
## $ PercentSalaryHike : int 11 14 11 19 13 21 12 14 19 14 ...
## $ PerformanceRating : int 3 3 3 3 3 4 3 3 3 3 ...
## $ RelationshipSatisfaction: int 3 1 3 3 3 3 1 3 4 2 ...
## $ StandardHours : int 80 80 80 80 80 80 80 80 80 80 ...
## $ StockOptionLevel : int 1 0 0 2 0 2 0 3 1 1 ...
## $ TotalWorkingYears : int 8 21 10 14 6 9 7 8 1 8 ...
## $ TrainingTimesLastYear : int 3 2 2 3 2 4 5 5 2 3 ...
## $ WorkLifeBalance : int 2 4 3 3 3 2 2 3 3 2 ...
## $ YearsAtCompany : int 5 20 2 14 6 9 4 1 1 8 ...
## $ YearsInCurrentRole : int 2 7 2 10 3 7 2 0 1 2 ...
## $ YearsSinceLastPromotion : int 0 4 2 5 1 1 0 0 0 7 ...
## $ YearsWithCurrManager : int 3 9 2 7 3 7 3 0 0 7 ...
## 'data.frame': 300 obs. of 35 variables:
## $ ID : int 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 ...
## $ Age : int 35 33 26 55 29 51 52 39 31 31 ...
## $ BusinessTravel : chr "Travel_Rarely" "Travel_Rarely" "Travel_Rarely" "Travel_Rarely" ...
## $ DailyRate : int 750 147 1330 1311 1246 1456 585 1387 1062 534 ...
## $ Department : chr "Research & Development" "Human Resources" "Research & Development" "Research & Development" ...
## $ DistanceFromHome : int 28 2 21 2 19 1 29 10 24 20 ...
## $ Education : int 3 3 3 3 3 4 4 5 3 3 ...
## $ EducationField : chr "Life Sciences" "Human Resources" "Medical" "Life Sciences" ...
## $ EmployeeCount : int 1 1 1 1 1 1 1 1 1 1 ...
## $ EmployeeNumber : int 1596 1207 1107 505 1497 145 2019 1618 1252 587 ...
## $ EnvironmentSatisfaction : int 2 2 1 3 3 1 1 2 3 1 ...
## $ Gender : chr "Male" "Male" "Male" "Female" ...
## $ HourlyRate : int 46 99 37 97 77 30 40 76 96 66 ...
## $ JobInvolvement : int 4 3 3 3 2 2 3 3 2 3 ...
## $ JobLevel : int 2 1 1 4 2 3 1 2 2 3 ...
## $ JobRole : chr "Laboratory Technician" "Human Resources" "Laboratory Technician" "Manager" ...
## $ JobSatisfaction : int 3 3 3 4 3 1 4 1 1 3 ...
## $ MaritalStatus : chr "Married" "Married" "Divorced" "Single" ...
## $ MonthlyIncome : int 3407 3600 2377 16659 8620 7484 3482 5377 6812 9824 ...
## $ MonthlyRate : int 25348 8429 19373 23258 23757 25796 19788 3835 17198 22908 ...
## $ NumCompaniesWorked : int 1 1 1 2 1 3 2 2 1 3 ...
## $ Over18 : chr "Y" "Y" "Y" "Y" ...
## $ OverTime : chr "No" "No" "No" "Yes" ...
## $ PercentSalaryHike : int 17 13 20 13 14 20 15 13 19 12 ...
## $ PerformanceRating : int 3 3 4 3 3 4 3 3 3 3 ...
## $ RelationshipSatisfaction: int 4 4 3 3 3 3 2 4 2 1 ...
## $ StandardHours : int 80 80 80 80 80 80 80 80 80 80 ...
## $ StockOptionLevel : int 2 1 1 0 2 0 2 3 0 0 ...
## $ TotalWorkingYears : int 10 5 1 30 10 23 16 10 10 12 ...
## $ TrainingTimesLastYear : int 3 2 0 2 3 1 3 3 2 2 ...
## $ WorkLifeBalance : int 2 3 2 3 3 2 2 3 3 3 ...
## $ YearsAtCompany : int 10 5 1 5 10 13 9 7 10 1 ...
## $ YearsInCurrentRole : int 9 4 1 4 7 12 8 7 9 0 ...
## $ YearsSinceLastPromotion : int 6 1 0 1 0 12 0 7 1 0 ...
## $ YearsWithCurrManager : int 8 4 0 2 4 8 0 7 8 0 ...
## 'data.frame': 300 obs. of 35 variables:
## $ ID : int 871 872 873 874 875 876 877 878 879 880 ...
## $ Age : int 43 33 55 36 27 39 33 21 30 51 ...
## $ Attrition : chr "No" "No" "Yes" "No" ...
## $ BusinessTravel : chr "Travel_Frequently" "Travel_Rarely" "Travel_Rarely" "Non-Travel" ...
## $ DailyRate : int 1422 461 267 1351 1302 895 750 251 1312 1405 ...
## $ Department : chr "Sales" "Research & Development" "Sales" "Research & Development" ...
## $ DistanceFromHome : int 2 13 13 9 19 5 22 10 23 11 ...
## $ Education : int 4 1 4 4 3 3 2 2 3 2 ...
## $ EducationField : chr "Life Sciences" "Life Sciences" "Marketing" "Life Sciences" ...
## $ EmployeeCount : int 1 1 1 1 1 1 1 1 1 1 ...
## $ EmployeeNumber : int 1849 995 1372 1949 1619 42 160 1279 159 1367 ...
## $ EnvironmentSatisfaction : int 1 2 1 1 4 4 3 1 1 4 ...
## $ Gender : chr "Male" "Female" "Male" "Male" ...
## $ HourlyRate : int 92 53 85 66 67 56 95 45 96 82 ...
## $ JobInvolvement : int 3 3 4 4 2 3 3 2 1 2 ...
## $ JobLevel : int 2 1 4 1 1 2 2 1 1 4 ...
## $ JobRole : chr "Sales Executive" "Research Scientist" "Sales Executive" "Laboratory Technician" ...
## $ JobSatisfaction : int 4 4 3 2 1 4 2 3 3 2 ...
## $ MaritalStatus : chr "Married" "Single" "Single" "Married" ...
## $ MonthlyRate : int 19246 17241 9277 9238 16290 3335 15480 25308 22310 24439 ...
## $ NumCompaniesWorked : int 1 3 6 1 1 3 0 1 1 3 ...
## $ Over18 : chr "Y" "Y" "Y" "Y" ...
## $ OverTime : chr "No" "No" "Yes" "No" ...
## $ PercentSalaryHike : int 20 18 17 22 11 14 13 20 25 16 ...
## $ PerformanceRating : int 4 3 3 4 3 3 3 4 4 3 ...
## $ RelationshipSatisfaction: int 3 1 3 2 1 3 1 3 3 2 ...
## $ StandardHours : int 80 80 80 80 80 80 80 80 80 80 ...
## $ StockOptionLevel : int 1 0 0 0 2 1 1 0 3 0 ...
## $ TotalWorkingYears : int 7 5 24 5 7 19 8 2 10 29 ...
## $ TrainingTimesLastYear : int 5 4 2 3 3 6 2 2 2 1 ...
## $ WorkLifeBalance : int 3 3 2 3 3 4 4 1 2 2 ...
## $ YearsAtCompany : int 7 3 19 5 7 1 7 2 10 5 ...
## $ YearsInCurrentRole : int 7 2 7 4 7 0 7 2 7 2 ...
## $ YearsSinceLastPromotion : int 7 0 3 0 0 0 0 2 0 0 ...
## $ YearsWithCurrManager : int 7 2 8 2 7 0 7 2 9 3 ...
## ID Age Attrition BusinessTravel
## Min. : 1.0 Min. :18.00 Length:870 Length:870
## 1st Qu.:218.2 1st Qu.:30.00 Class :character Class :character
## Median :435.5 Median :35.00 Mode :character Mode :character
## Mean :435.5 Mean :36.83
## 3rd Qu.:652.8 3rd Qu.:43.00
## Max. :870.0 Max. :60.00
## DailyRate Department DistanceFromHome Education
## Min. : 103.0 Length:870 Min. : 1.000 Min. :1.000
## 1st Qu.: 472.5 Class :character 1st Qu.: 2.000 1st Qu.:2.000
## Median : 817.5 Mode :character Median : 7.000 Median :3.000
## Mean : 815.2 Mean : 9.339 Mean :2.901
## 3rd Qu.:1165.8 3rd Qu.:14.000 3rd Qu.:4.000
## Max. :1499.0 Max. :29.000 Max. :5.000
## EducationField EmployeeCount EmployeeNumber EnvironmentSatisfaction
## Length:870 Min. :1 Min. : 1.0 Min. :1.000
## Class :character 1st Qu.:1 1st Qu.: 477.2 1st Qu.:2.000
## Mode :character Median :1 Median :1039.0 Median :3.000
## Mean :1 Mean :1029.8 Mean :2.701
## 3rd Qu.:1 3rd Qu.:1561.5 3rd Qu.:4.000
## Max. :1 Max. :2064.0 Max. :4.000
## Gender HourlyRate JobInvolvement JobLevel
## Length:870 Min. : 30.00 Min. :1.000 Min. :1.000
## Class :character 1st Qu.: 48.00 1st Qu.:2.000 1st Qu.:1.000
## Mode :character Median : 66.00 Median :3.000 Median :2.000
## Mean : 65.61 Mean :2.723 Mean :2.039
## 3rd Qu.: 83.00 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :100.00 Max. :4.000 Max. :5.000
## JobRole JobSatisfaction MaritalStatus MonthlyIncome
## Length:870 Min. :1.000 Length:870 Min. : 1081
## Class :character 1st Qu.:2.000 Class :character 1st Qu.: 2840
## Mode :character Median :3.000 Mode :character Median : 4946
## Mean :2.709 Mean : 6390
## 3rd Qu.:4.000 3rd Qu.: 8182
## Max. :4.000 Max. :19999
## MonthlyRate NumCompaniesWorked Over18 OverTime
## Min. : 2094 Min. :0.000 Length:870 Length:870
## 1st Qu.: 8092 1st Qu.:1.000 Class :character Class :character
## Median :14074 Median :2.000 Mode :character Mode :character
## Mean :14326 Mean :2.728
## 3rd Qu.:20456 3rd Qu.:4.000
## Max. :26997 Max. :9.000
## PercentSalaryHike PerformanceRating RelationshipSatisfaction StandardHours
## Min. :11.0 Min. :3.000 Min. :1.000 Min. :80
## 1st Qu.:12.0 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:80
## Median :14.0 Median :3.000 Median :3.000 Median :80
## Mean :15.2 Mean :3.152 Mean :2.707 Mean :80
## 3rd Qu.:18.0 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:80
## Max. :25.0 Max. :4.000 Max. :4.000 Max. :80
## StockOptionLevel TotalWorkingYears TrainingTimesLastYear WorkLifeBalance
## Min. :0.0000 Min. : 0.00 Min. :0.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.: 6.00 1st Qu.:2.000 1st Qu.:2.000
## Median :1.0000 Median :10.00 Median :3.000 Median :3.000
## Mean :0.7839 Mean :11.05 Mean :2.832 Mean :2.782
## 3rd Qu.:1.0000 3rd Qu.:15.00 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :3.0000 Max. :40.00 Max. :6.000 Max. :4.000
## YearsAtCompany YearsInCurrentRole YearsSinceLastPromotion
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 3.000 1st Qu.: 2.000 1st Qu.: 0.000
## Median : 5.000 Median : 3.000 Median : 1.000
## Mean : 6.962 Mean : 4.205 Mean : 2.169
## 3rd Qu.:10.000 3rd Qu.: 7.000 3rd Qu.: 3.000
## Max. :40.000 Max. :18.000 Max. :15.000
## YearsWithCurrManager
## Min. : 0.00
## 1st Qu.: 2.00
## Median : 3.00
## Mean : 4.14
## 3rd Qu.: 7.00
## Max. :17.00
## ID Age BusinessTravel DailyRate
## Min. :1171 Min. :19.00 Length:300 Min. : 102.0
## 1st Qu.:1246 1st Qu.:31.00 Class :character 1st Qu.: 448.0
## Median :1320 Median :36.00 Mode :character Median : 775.0
## Mean :1320 Mean :37.86 Mean : 784.8
## 3rd Qu.:1395 3rd Qu.:44.00 3rd Qu.:1117.0
## Max. :1470 Max. :60.00 Max. :1490.0
## Department DistanceFromHome Education EducationField
## Length:300 Min. : 1.00 Min. :1.000 Length:300
## Class :character 1st Qu.: 2.00 1st Qu.:2.000 Class :character
## Mode :character Median : 7.00 Median :3.000 Mode :character
## Mean : 9.26 Mean :2.973
## 3rd Qu.:14.00 3rd Qu.:4.000
## Max. :29.00 Max. :5.000
## EmployeeCount EmployeeNumber EnvironmentSatisfaction Gender
## Min. :1 Min. : 2.0 Min. :1.000 Length:300
## 1st Qu.:1 1st Qu.: 508.8 1st Qu.:2.000 Class :character
## Median :1 Median : 994.5 Median :3.000 Mode :character
## Mean :1 Mean :1020.9 Mean :2.733
## 3rd Qu.:1 3rd Qu.:1542.5 3rd Qu.:4.000
## Max. :1 Max. :2065.0 Max. :4.000
## HourlyRate JobInvolvement JobLevel JobRole
## Min. : 30.00 Min. :1.000 Min. :1.0 Length:300
## 1st Qu.: 50.00 1st Qu.:2.000 1st Qu.:1.0 Class :character
## Median : 66.00 Median :3.000 Median :2.0 Mode :character
## Mean : 66.07 Mean :2.743 Mean :2.2
## 3rd Qu.: 83.00 3rd Qu.:3.000 3rd Qu.:3.0
## Max. :100.00 Max. :4.000 Max. :5.0
## JobSatisfaction MaritalStatus MonthlyIncome MonthlyRate
## Min. :1.000 Length:300 Min. : 1232 Min. : 2097
## 1st Qu.:2.000 Class :character 1st Qu.: 3034 1st Qu.: 8420
## Median :3.000 Mode :character Median : 5208 Median :15091
## Mean :2.767 Mean : 7103 Mean :14499
## 3rd Qu.:4.000 3rd Qu.: 9750 3rd Qu.:20330
## Max. :4.000 Max. :19973 Max. :26914
## NumCompaniesWorked Over18 OverTime PercentSalaryHike
## Min. :0.000 Length:300 Length:300 Min. :11.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:12.00
## Median :2.000 Mode :character Mode :character Median :14.00
## Mean :2.547 Mean :15.17
## 3rd Qu.:4.000 3rd Qu.:18.00
## Max. :9.000 Max. :25.00
## PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## Min. :3.000 Min. :1.000 Min. :80 Min. :0.0000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:80 1st Qu.:0.0000
## Median :3.000 Median :3.000 Median :80 Median :1.0000
## Mean :3.153 Mean :2.803 Mean :80 Mean :0.7833
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:80 3rd Qu.:1.0000
## Max. :4.000 Max. :4.000 Max. :80 Max. :3.0000
## TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## Min. : 0.00 Min. :0.000 Min. :1.000 Min. : 0.000
## 1st Qu.: 6.00 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 3.000
## Median :10.00 Median :2.000 Median :3.000 Median : 5.000
## Mean :12.44 Mean :2.683 Mean :2.747 Mean : 7.527
## 3rd Qu.:18.00 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:10.000
## Max. :38.00 Max. :6.000 Max. :4.000 Max. :37.000
## YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager
## Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.00 1st Qu.: 0.00 1st Qu.: 2.00
## Median : 3.00 Median : 1.00 Median : 3.00
## Mean : 4.33 Mean : 2.29 Mean : 4.38
## 3rd Qu.: 7.00 3rd Qu.: 3.00 3rd Qu.: 7.00
## Max. :18.00 Max. :15.00 Max. :17.00
## ID Age Attrition BusinessTravel
## Min. : 871.0 Min. :18.00 Length:300 Length:300
## 1st Qu.: 945.8 1st Qu.:29.00 Class :character Class :character
## Median :1020.5 Median :36.00 Mode :character Mode :character
## Mean :1020.5 Mean :36.27
## 3rd Qu.:1095.2 3rd Qu.:42.00
## Max. :1170.0 Max. :60.00
## DailyRate Department DistanceFromHome Education
## Min. : 105.0 Length:300 Min. : 1.00 Min. :1.000
## 1st Qu.: 429.2 Class :character 1st Qu.: 2.00 1st Qu.:2.000
## Median : 693.0 Mode :character Median : 7.00 Median :3.000
## Mean : 783.2 Mean : 8.70 Mean :2.887
## 3rd Qu.:1171.2 3rd Qu.:11.25 3rd Qu.:4.000
## Max. :1492.0 Max. :29.00 Max. :5.000
## EducationField EmployeeCount EmployeeNumber EnvironmentSatisfaction
## Length:300 Min. :1 Min. : 7 Min. :1.00
## Class :character 1st Qu.:1 1st Qu.: 477 1st Qu.:2.00
## Mode :character Median :1 Median :1008 Median :3.00
## Mean :1 Mean :1014 Mean :2.77
## 3rd Qu.:1 3rd Qu.:1569 3rd Qu.:4.00
## Max. :1 Max. :2068 Max. :4.00
## Gender HourlyRate JobInvolvement JobLevel
## Length:300 Min. : 30.00 Min. :1.000 Min. :1
## Class :character 1st Qu.: 48.00 1st Qu.:2.000 1st Qu.:1
## Mode :character Median : 66.00 Median :3.000 Median :2
## Mean : 66.52 Mean :2.737 Mean :2
## 3rd Qu.: 85.25 3rd Qu.:3.000 3rd Qu.:2
## Max. :100.00 Max. :4.000 Max. :5
## JobRole JobSatisfaction MaritalStatus MonthlyRate
## Length:300 Min. :1.000 Length:300 Min. : 2122
## Class :character 1st Qu.:2.000 Class :character 1st Qu.: 7778
## Mode :character Median :3.000 Mode :character Median :13508
## Mean :2.747 Mean :14091
## 3rd Qu.:4.000 3rd Qu.:20464
## Max. :4.000 Max. :26999
## NumCompaniesWorked Over18 OverTime PercentSalaryHike
## Min. :0.00 Length:300 Length:300 Min. :11.00
## 1st Qu.:1.00 Class :character Class :character 1st Qu.:12.75
## Median :2.00 Mode :character Mode :character Median :14.00
## Mean :2.74 Mean :15.28
## 3rd Qu.:4.00 3rd Qu.:18.00
## Max. :9.00 Max. :25.00
## PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## Min. :3.00 Min. :1.000 Min. :80 Min. :0.0000
## 1st Qu.:3.00 1st Qu.:2.000 1st Qu.:80 1st Qu.:0.0000
## Median :3.00 Median :3.000 Median :80 Median :1.0000
## Mean :3.16 Mean :2.637 Mean :80 Mean :0.8333
## 3rd Qu.:3.00 3rd Qu.:4.000 3rd Qu.:80 3rd Qu.:1.0000
## Max. :4.00 Max. :4.000 Max. :80 Max. :3.0000
## TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## Min. : 0.00 Min. :0.00 Min. :1.000 Min. : 0.000
## 1st Qu.: 6.00 1st Qu.:2.00 1st Qu.:2.000 1st Qu.: 3.000
## Median : 9.00 Median :3.00 Median :3.000 Median : 5.000
## Mean :10.78 Mean :2.82 Mean :2.717 Mean : 6.623
## 3rd Qu.:14.00 3rd Qu.:3.00 3rd Qu.:3.000 3rd Qu.: 9.000
## Max. :40.00 Max. :6.00 Max. :4.000 Max. :33.000
## YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager
## Min. : 0.0 Min. : 0.00 Min. : 0.000
## 1st Qu.: 2.0 1st Qu.: 0.00 1st Qu.: 2.000
## Median : 3.0 Median : 1.00 Median : 3.000
## Mean : 4.2 Mean : 2.14 Mean : 3.817
## 3rd Qu.: 7.0 3rd Qu.: 3.00 3rd Qu.: 7.000
## Max. :16.0 Max. :15.00 Max. :15.000
## ID Age Attrition BusinessTravel DailyRate Department
## 1 1 32 No Travel_Rarely 117 Sales
## 2 2 40 No Travel_Rarely 1308 Research & Development
## 3 3 35 No Travel_Frequently 200 Research & Development
## 4 4 32 No Travel_Rarely 801 Sales
## 5 5 24 No Travel_Frequently 567 Research & Development
## 6 6 27 No Travel_Frequently 294 Research & Development
## 7 7 41 No Travel_Rarely 1283 Research & Development
## 8 8 37 No Travel_Rarely 309 Sales
## 9 9 34 No Travel_Rarely 1333 Sales
## 10 10 34 No Travel_Frequently 653 Research & Development
## 11 11 43 No Travel_Rarely 823 Research & Development
## 12 12 28 No Non-Travel 280 Human Resources
## 13 13 35 No Travel_Rarely 950 Research & Development
## 14 14 30 No Travel_Rarely 202 Sales
## 15 15 46 No Travel_Rarely 991 Human Resources
## 16 16 31 No Non-Travel 1188 Sales
## 17 17 32 No Travel_Rarely 498 Research & Development
## 18 18 46 No Non-Travel 1144 Research & Development
## 19 19 34 No Travel_Rarely 181 Research & Development
## 20 20 44 No Travel_Rarely 170 Research & Development
## 21 21 36 No Travel_Rarely 913 Research & Development
## 22 22 48 No Travel_Rarely 817 Sales
## 23 23 43 No Travel_Frequently 1001 Research & Development
## 24 24 31 No Travel_Frequently 715 Sales
## 25 25 33 No Travel_Rarely 1069 Research & Development
## DistanceFromHome Education EducationField EmployeeCount EmployeeNumber
## 1 13 4 Life Sciences 1 859
## 2 14 3 Medical 1 1128
## 3 18 2 Life Sciences 1 1412
## 4 1 4 Marketing 1 2016
## 5 2 1 Technical Degree 1 1646
## 6 10 2 Life Sciences 1 733
## 7 5 5 Medical 1 1448
## 8 10 4 Life Sciences 1 1105
## 9 10 4 Life Sciences 1 1055
## 10 10 4 Technical Degree 1 1597
## 11 6 3 Medical 1 1866
## 12 1 2 Life Sciences 1 1858
## 13 7 3 Other 1 845
## 14 2 1 Technical Degree 1 508
## 15 1 2 Life Sciences 1 1314
## 16 20 2 Marketing 1 947
## 17 3 4 Medical 1 966
## 18 7 4 Medical 1 487
## 19 2 4 Medical 1 1755
## 20 1 4 Life Sciences 1 1903
## 21 9 2 Medical 1 699
## 22 2 1 Marketing 1 712
## 23 9 5 Medical 1 663
## 24 2 4 Other 1 1613
## 25 1 3 Life Sciences 1 969
## EnvironmentSatisfaction Gender HourlyRate JobInvolvement JobLevel
## 1 2 Male 73 3 2
## 2 3 Male 44 2 5
## 3 3 Male 60 3 3
## 4 3 Female 48 3 3
## 5 1 Female 32 3 1
## 6 4 Male 32 3 3
## 7 2 Male 90 4 1
## 8 4 Female 88 2 2
## 9 3 Female 87 3 1
## 10 4 Male 92 2 2
## 11 1 Female 81 2 5
## 12 3 Male 43 3 1
## 13 3 Male 59 3 3
## 14 3 Male 72 3 1
## 15 4 Female 44 3 1
## 16 4 Female 45 3 2
## 17 3 Female 93 3 2
## 18 3 Female 30 3 2
## 19 4 Male 97 4 1
## 20 2 Male 78 4 2
## 21 2 Male 48 2 2
## 22 2 Male 56 4 2
## 23 4 Male 72 3 2
## 24 4 Male 54 3 2
## 25 2 Female 42 2 2
## JobRole JobSatisfaction MaritalStatus MonthlyIncome
## 1 Sales Executive 4 Divorced 4403
## 2 Research Director 3 Single 19626
## 3 Manufacturing Director 4 Single 9362
## 4 Sales Executive 4 Married 10422
## 5 Research Scientist 4 Single 3760
## 6 Manufacturing Director 1 Divorced 8793
## 7 Research Scientist 3 Married 2127
## 8 Sales Executive 4 Divorced 6694
## 9 Sales Representative 3 Married 2220
## 10 Healthcare Representative 3 Married 5063
## 11 Manager 3 Married 19392
## 12 Human Resources 4 Divorced 2706
## 13 Manufacturing Director 3 Single 10221
## 14 Sales Representative 2 Married 2476
## 15 Human Resources 1 Single 3423
## 16 Sales Executive 3 Married 6932
## 17 Manufacturing Director 1 Married 6725
## 18 Manufacturing Director 3 Married 5258
## 19 Research Scientist 4 Married 2932
## 20 Healthcare Representative 1 Married 5033
## 21 Manufacturing Director 2 Divorced 8847
## 22 Sales Executive 2 Married 8120
## 23 Laboratory Technician 3 Divorced 5679
## 24 Sales Executive 1 Single 5332
## 25 Healthcare Representative 4 Single 6949
## MonthlyRate NumCompaniesWorked Over18 OverTime PercentSalaryHike
## 1 9250 2 Y No 11
## 2 17544 1 Y No 14
## 3 19944 2 Y No 11
## 4 24032 1 Y No 19
## 5 17218 1 Y Yes 13
## 6 4809 1 Y No 21
## 7 5561 2 Y Yes 12
## 8 24223 2 Y Yes 14
## 9 18410 1 Y Yes 19
## 10 15332 1 Y No 14
## 11 22539 7 Y No 13
## 12 10494 1 Y No 15
## 13 18869 3 Y No 21
## 14 17434 1 Y No 18
## 15 22957 6 Y No 12
## 16 24406 1 Y No 13
## 17 13554 1 Y No 12
## 18 16044 2 Y No 14
## 19 5586 0 Y Yes 14
## 20 9364 2 Y No 15
## 21 13934 2 Y Yes 11
## 22 18597 3 Y No 12
## 23 19627 3 Y Yes 13
## 24 21602 7 Y No 13
## 25 12291 0 Y No 14
## PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## 1 3 3 80 1
## 2 3 1 80 0
## 3 3 3 80 0
## 4 3 3 80 2
## 5 3 3 80 0
## 6 4 3 80 2
## 7 3 1 80 0
## 8 3 3 80 3
## 9 3 4 80 1
## 10 3 2 80 1
## 11 3 4 80 0
## 12 3 2 80 1
## 13 4 2 80 0
## 14 3 1 80 1
## 15 3 3 80 0
## 16 3 4 80 1
## 17 3 3 80 1
## 18 3 3 80 0
## 19 3 1 80 3
## 20 3 4 80 1
## 21 3 3 80 1
## 22 3 4 80 0
## 23 3 2 80 1
## 24 3 4 80 0
## 25 3 1 80 0
## TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## 1 8 3 2 5
## 2 21 2 4 20
## 3 10 2 3 2
## 4 14 3 3 14
## 5 6 2 3 6
## 6 9 4 2 9
## 7 7 5 2 4
## 8 8 5 3 1
## 9 1 2 3 1
## 10 8 3 2 8
## 11 21 2 3 16
## 12 3 2 3 3
## 13 17 3 4 8
## 14 1 3 3 1
## 15 10 3 4 7
## 16 9 2 2 9
## 17 8 2 4 8
## 18 7 2 4 1
## 19 6 3 3 5
## 20 10 5 3 2
## 21 13 2 3 3
## 22 12 3 3 2
## 23 10 3 3 8
## 24 10 3 3 5
## 25 6 3 3 5
## YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager
## 1 2 0 3
## 2 7 4 9
## 3 2 2 2
## 4 10 5 7
## 5 3 1 3
## 6 7 1 7
## 7 2 0 3
## 8 0 0 0
## 9 1 0 0
## 10 2 7 7
## 11 12 6 14
## 12 2 2 2
## 13 5 1 6
## 14 0 0 0
## 15 6 5 7
## 16 8 0 0
## 17 7 6 3
## 18 0 0 0
## 19 0 1 2
## 20 0 2 2
## 21 2 0 2
## 22 2 2 2
## 23 7 4 7
## 24 2 0 3
## 25 0 1 4
## ID Age BusinessTravel DailyRate Department DistanceFromHome
## 1 1171 35 Travel_Rarely 750 Research & Development 28
## 2 1172 33 Travel_Rarely 147 Human Resources 2
## 3 1173 26 Travel_Rarely 1330 Research & Development 21
## 4 1174 55 Travel_Rarely 1311 Research & Development 2
## 5 1175 29 Travel_Rarely 1246 Sales 19
## 6 1176 51 Travel_Frequently 1456 Research & Development 1
## 7 1177 52 Non-Travel 585 Sales 29
## 8 1178 39 Travel_Rarely 1387 Research & Development 10
## 9 1179 31 Travel_Rarely 1062 Research & Development 24
## 10 1180 31 Travel_Frequently 534 Research & Development 20
## 11 1181 40 Travel_Frequently 1469 Research & Development 9
## 12 1182 31 Travel_Rarely 1082 Research & Development 1
## 13 1183 33 Non-Travel 775 Research & Development 4
## 14 1184 50 Travel_Rarely 691 Research & Development 2
## 15 1185 33 Travel_Rarely 516 Research & Development 8
## 16 1186 42 Non-Travel 335 Research & Development 23
## 17 1187 33 Travel_Rarely 575 Research & Development 25
## 18 1188 29 Travel_Rarely 694 Research & Development 1
## 19 1189 55 Travel_Rarely 436 Sales 2
## 20 1190 49 Travel_Rarely 1184 Sales 11
## 21 1191 32 Travel_Rarely 267 Research & Development 29
## 22 1192 50 Travel_Rarely 1126 Research & Development 1
## 23 1193 54 Travel_Rarely 157 Research & Development 10
## 24 1194 24 Travel_Frequently 897 Human Resources 10
## 25 1195 45 Travel_Rarely 1038 Research & Development 20
## Education EducationField EmployeeCount EmployeeNumber
## 1 3 Life Sciences 1 1596
## 2 3 Human Resources 1 1207
## 3 3 Medical 1 1107
## 4 3 Life Sciences 1 505
## 5 3 Life Sciences 1 1497
## 6 4 Medical 1 145
## 7 4 Life Sciences 1 2019
## 8 5 Medical 1 1618
## 9 3 Medical 1 1252
## 10 3 Life Sciences 1 587
## 11 4 Medical 1 964
## 12 4 Medical 1 95
## 13 3 Technical Degree 1 1771
## 14 3 Medical 1 815
## 15 5 Life Sciences 1 1515
## 16 2 Life Sciences 1 1976
## 17 3 Life Sciences 1 1545
## 18 3 Life Sciences 1 1264
## 19 1 Medical 1 842
## 20 3 Marketing 1 840
## 21 4 Life Sciences 1 2010
## 22 2 Medical 1 997
## 23 3 Medical 1 1980
## 24 3 Medical 1 1746
## 25 3 Medical 1 1460
## EnvironmentSatisfaction Gender HourlyRate JobInvolvement JobLevel
## 1 2 Male 46 4 2
## 2 2 Male 99 3 1
## 3 1 Male 37 3 1
## 4 3 Female 97 3 4
## 5 3 Male 77 2 2
## 6 1 Female 30 2 3
## 7 1 Male 40 3 1
## 8 2 Male 76 3 2
## 9 3 Female 96 2 2
## 10 1 Male 66 3 3
## 11 4 Male 35 3 1
## 12 3 Male 87 3 1
## 13 4 Male 90 3 2
## 14 3 Male 64 3 4
## 15 4 Male 69 3 2
## 16 4 Male 37 2 2
## 17 4 Male 44 2 2
## 18 4 Female 87 2 4
## 19 3 Male 37 3 2
## 20 3 Female 43 3 3
## 21 3 Female 49 2 1
## 22 4 Male 66 3 4
## 23 3 Female 77 3 2
## 24 1 Male 59 3 1
## 25 2 Male 95 1 3
## JobRole JobSatisfaction MaritalStatus MonthlyIncome
## 1 Laboratory Technician 3 Married 3407
## 2 Human Resources 3 Married 3600
## 3 Laboratory Technician 3 Divorced 2377
## 4 Manager 4 Single 16659
## 5 Sales Executive 3 Divorced 8620
## 6 Healthcare Representative 1 Single 7484
## 7 Sales Representative 4 Divorced 3482
## 8 Manufacturing Director 1 Married 5377
## 9 Healthcare Representative 1 Single 6812
## 10 Healthcare Representative 3 Married 9824
## 11 Research Scientist 2 Divorced 3617
## 12 Research Scientist 2 Single 2501
## 13 Research Scientist 2 Divorced 3055
## 14 Research Director 3 Married 17639
## 15 Healthcare Representative 3 Single 6388
## 16 Research Scientist 3 Single 4332
## 17 Manufacturing Director 2 Single 4320
## 18 Research Director 4 Divorced 16124
## 19 Sales Executive 4 Single 5160
## 20 Sales Executive 4 Married 7654
## 21 Laboratory Technician 2 Single 2837
## 22 Research Director 4 Divorced 17399
## 23 Manufacturing Director 1 Single 4440
## 24 Human Resources 4 Married 2145
## 25 Healthcare Representative 1 Divorced 10851
## MonthlyRate NumCompaniesWorked Over18 OverTime PercentSalaryHike
## 1 25348 1 Y No 17
## 2 8429 1 Y No 13
## 3 19373 1 Y No 20
## 4 23258 2 Y Yes 13
## 5 23757 1 Y No 14
## 6 25796 3 Y No 20
## 7 19788 2 Y No 15
## 8 3835 2 Y No 13
## 9 17198 1 Y No 19
## 10 22908 3 Y No 12
## 11 25063 8 Y Yes 14
## 12 18775 1 Y No 17
## 13 6194 5 Y No 15
## 14 6881 5 Y No 16
## 15 22049 2 Y Yes 17
## 16 14811 1 Y No 12
## 17 24152 1 Y No 13
## 18 3423 3 Y No 14
## 19 21519 4 Y No 16
## 20 5860 1 Y No 18
## 21 15919 1 Y No 13
## 22 6615 9 Y No 22
## 23 25198 6 Y Yes 19
## 24 2097 0 Y No 14
## 25 19863 2 Y Yes 18
## PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## 1 3 4 80 2
## 2 3 4 80 1
## 3 4 3 80 1
## 4 3 3 80 0
## 5 3 3 80 2
## 6 4 3 80 0
## 7 3 2 80 2
## 8 3 4 80 3
## 9 3 2 80 0
## 10 3 1 80 0
## 11 3 4 80 1
## 12 3 2 80 0
## 13 3 4 80 2
## 14 3 4 80 0
## 15 3 1 80 0
## 16 3 4 80 0
## 17 3 4 80 0
## 18 3 2 80 2
## 19 3 3 80 0
## 20 3 1 80 2
## 21 3 3 80 0
## 22 4 3 80 1
## 23 3 4 80 0
## 24 3 4 80 1
## 25 3 2 80 1
## TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## 1 10 3 2 10
## 2 5 2 3 5
## 3 1 0 2 1
## 4 30 2 3 5
## 5 10 3 3 10
## 6 23 1 2 13
## 7 16 3 2 9
## 8 10 3 3 7
## 9 10 2 3 10
## 10 12 2 3 1
## 11 3 2 3 1
## 12 1 4 3 1
## 13 11 2 2 9
## 14 30 3 3 4
## 15 14 6 3 0
## 16 20 2 3 20
## 17 5 2 3 5
## 18 9 2 2 7
## 19 12 3 2 9
## 20 9 3 4 9
## 21 6 3 3 6
## 22 32 1 2 5
## 23 9 3 3 5
## 24 3 2 3 2
## 25 24 2 3 7
## YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager
## 1 9 6 8
## 2 4 1 4
## 3 1 0 0
## 4 4 1 2
## 5 7 0 4
## 6 12 12 8
## 7 8 0 0
## 8 7 7 7
## 9 9 1 8
## 10 0 0 0
## 11 1 0 0
## 12 1 1 0
## 13 8 1 7
## 14 3 0 3
## 15 0 0 0
## 16 9 3 7
## 17 3 0 2
## 18 7 1 7
## 19 7 7 3
## 20 8 7 7
## 21 2 4 1
## 22 4 1 3
## 23 2 1 4
## 24 2 2 1
## 25 7 0 7
## ID Age Attrition BusinessTravel DailyRate Department
## 1 871 43 No Travel_Frequently 1422 Sales
## 2 872 33 No Travel_Rarely 461 Research & Development
## 3 873 55 Yes Travel_Rarely 267 Sales
## 4 874 36 No Non-Travel 1351 Research & Development
## 5 875 27 No Travel_Rarely 1302 Research & Development
## 6 876 39 Yes Travel_Rarely 895 Sales
## 7 877 33 No Non-Travel 750 Sales
## 8 878 21 Yes Travel_Frequently 251 Research & Development
## 9 879 30 No Travel_Frequently 1312 Research & Development
## 10 880 51 No Travel_Rarely 1405 Research & Development
## 11 881 46 Yes Travel_Rarely 377 Sales
## 12 882 40 No Travel_Rarely 329 Research & Development
## 13 883 29 No Travel_Rarely 1176 Sales
## 14 884 27 No Travel_Frequently 829 Sales
## 15 885 29 No Travel_Rarely 1210 Sales
## 16 886 22 Yes Travel_Rarely 617 Research & Development
## 17 887 29 No Travel_Rarely 726 Research & Development
## 18 888 34 No Travel_Rarely 167 Research & Development
## 19 889 31 No Travel_Rarely 655 Research & Development
## 20 890 35 No Travel_Rarely 528 Human Resources
## 21 891 40 No Travel_Rarely 523 Research & Development
## 22 892 30 No Travel_Rarely 1288 Sales
## 23 893 46 Yes Travel_Rarely 669 Sales
## 24 894 27 No Travel_Frequently 1410 Sales
## 25 895 49 No Travel_Rarely 1490 Research & Development
## DistanceFromHome Education EducationField EmployeeCount EmployeeNumber
## 1 2 4 Life Sciences 1 1849
## 2 13 1 Life Sciences 1 995
## 3 13 4 Marketing 1 1372
## 4 9 4 Life Sciences 1 1949
## 5 19 3 Other 1 1619
## 6 5 3 Technical Degree 1 42
## 7 22 2 Marketing 1 160
## 8 10 2 Life Sciences 1 1279
## 9 23 3 Life Sciences 1 159
## 10 11 2 Technical Degree 1 1367
## 11 9 3 Marketing 1 1457
## 12 1 4 Life Sciences 1 1361
## 13 3 2 Medical 1 690
## 14 8 1 Marketing 1 800
## 15 2 3 Medical 1 366
## 16 3 1 Life Sciences 1 926
## 17 29 1 Life Sciences 1 1859
## 18 8 5 Life Sciences 1 775
## 19 7 4 Life Sciences 1 76
## 20 8 4 Technical Degree 1 1164
## 21 2 3 Life Sciences 1 1346
## 22 29 4 Technical Degree 1 1568
## 23 9 2 Medical 1 118
## 24 3 1 Medical 1 714
## 25 7 4 Life Sciences 1 1484
## EnvironmentSatisfaction Gender HourlyRate JobInvolvement JobLevel
## 1 1 Male 92 3 2
## 2 2 Female 53 3 1
## 3 1 Male 85 4 4
## 4 1 Male 66 4 1
## 5 4 Male 67 2 1
## 6 4 Male 56 3 2
## 7 3 Male 95 3 2
## 8 1 Female 45 2 1
## 9 1 Male 96 1 1
## 10 4 Female 82 2 4
## 11 1 Male 52 3 3
## 12 2 Male 88 3 1
## 13 2 Female 62 3 2
## 14 3 Male 84 3 2
## 15 1 Male 78 2 2
## 16 2 Female 34 3 2
## 17 4 Male 93 1 2
## 18 2 Female 32 3 2
## 19 4 Male 48 3 2
## 20 3 Male 100 3 1
## 21 3 Male 98 3 2
## 22 3 Male 33 3 3
## 23 3 Male 64 2 3
## 24 4 Female 71 4 2
## 25 3 Male 35 3 3
## JobRole JobSatisfaction MaritalStatus MonthlyRate
## 1 Sales Executive 4 Married 19246
## 2 Research Scientist 4 Single 17241
## 3 Sales Executive 3 Single 9277
## 4 Laboratory Technician 2 Married 9238
## 5 Laboratory Technician 1 Divorced 16290
## 6 Sales Representative 4 Married 3335
## 7 Sales Executive 2 Married 15480
## 8 Laboratory Technician 3 Single 25308
## 9 Research Scientist 3 Divorced 22310
## 10 Manufacturing Director 2 Single 24439
## 11 Sales Executive 4 Divorced 15986
## 12 Laboratory Technician 2 Married 6762
## 13 Sales Executive 3 Married 3487
## 14 Sales Executive 4 Married 24008
## 15 Sales Executive 2 Married 3687
## 16 Manufacturing Director 3 Married 10022
## 17 Healthcare Representative 3 Divorced 21143
## 18 Manufacturing Director 1 Divorced 4187
## 19 Laboratory Technician 4 Divorced 9528
## 20 Human Resources 3 Single 7108
## 21 Research Scientist 4 Single 22455
## 22 Sales Executive 2 Married 17799
## 23 Sales Executive 4 Single 13596
## 24 Sales Executive 4 Divorced 16673
## 25 Healthcare Representative 2 Divorced 20948
## NumCompaniesWorked Over18 OverTime PercentSalaryHike PerformanceRating
## 1 1 Y No 20 4
## 2 3 Y No 18 3
## 3 6 Y Yes 17 3
## 4 1 Y No 22 4
## 5 1 Y No 11 3
## 6 3 Y No 14 3
## 7 0 Y No 13 3
## 8 1 Y No 20 4
## 9 1 Y No 25 4
## 10 3 Y No 16 3
## 11 4 Y No 11 3
## 12 3 Y No 22 4
## 13 1 Y No 14 3
## 14 0 Y No 19 3
## 15 2 Y No 19 3
## 16 0 Y Yes 19 3
## 17 8 Y No 17 3
## 18 3 Y No 14 3
## 19 3 Y No 22 4
## 20 1 Y No 17 3
## 21 1 Y No 13 3
## 22 3 Y No 12 3
## 23 1 Y No 16 3
## 24 1 Y Yes 20 4
## 25 3 Y No 14 3
## RelationshipSatisfaction StandardHours StockOptionLevel TotalWorkingYears
## 1 3 80 1 7
## 2 1 80 0 5
## 3 3 80 0 24
## 4 2 80 0 5
## 5 1 80 2 7
## 6 3 80 1 19
## 7 1 80 1 8
## 8 3 80 0 2
## 9 3 80 3 10
## 10 2 80 0 29
## 11 1 80 1 28
## 12 3 80 1 7
## 13 1 80 1 6
## 14 2 80 1 5
## 15 2 80 2 10
## 16 1 80 1 4
## 17 4 80 2 11
## 18 3 80 1 7
## 19 4 80 1 10
## 20 2 80 0 6
## 21 3 80 0 9
## 22 2 80 1 9
## 23 4 80 0 9
## 24 2 80 2 6
## 25 2 80 2 29
## TrainingTimesLastYear WorkLifeBalance YearsAtCompany YearsInCurrentRole
## 1 5 3 7 7
## 2 4 3 3 2
## 3 2 2 19 7
## 4 3 3 5 4
## 5 3 3 7 7
## 6 6 4 1 0
## 7 2 4 7 7
## 8 2 1 2 2
## 9 2 2 10 7
## 10 1 2 5 2
## 11 1 4 7 7
## 12 3 3 4 2
## 13 5 2 6 0
## 14 3 3 4 2
## 15 2 3 0 0
## 16 3 4 3 2
## 17 3 3 7 0
## 18 3 3 0 0
## 19 3 2 7 7
## 20 2 1 5 4
## 21 4 3 9 8
## 22 3 3 4 2
## 23 3 3 9 8
## 24 3 3 6 5
## 25 3 3 8 7
## YearsSinceLastPromotion YearsWithCurrManager
## 1 7 7
## 2 0 2
## 3 3 8
## 4 0 2
## 5 0 7
## 6 0 0
## 7 0 7
## 8 2 2
## 9 0 9
## 10 0 3
## 11 4 3
## 12 0 3
## 13 1 2
## 14 1 1
## 15 0 0
## 16 0 2
## 17 1 6
## 18 0 0
## 19 1 7
## 20 1 4
## 21 8 8
## 22 1 3
## 23 4 7
## 24 0 4
## 25 0 7
## ID Age Attrition BusinessTravel DailyRate Department
## 846 846 28 No Travel_Rarely 1300 Research & Development
## 847 847 19 No Travel_Rarely 1181 Research & Development
## 848 848 39 No Travel_Rarely 1132 Research & Development
## 849 849 40 Yes Travel_Rarely 575 Sales
## 850 850 26 No Travel_Frequently 1096 Research & Development
## 851 851 31 Yes Travel_Rarely 542 Sales
## 852 852 30 No Travel_Rarely 125 Research & Development
## 853 853 29 No Travel_Rarely 153 Research & Development
## 854 854 28 No Travel_Rarely 1476 Research & Development
## 855 855 35 No Travel_Rarely 219 Research & Development
## 856 856 35 No Travel_Rarely 660 Sales
## 857 857 30 Yes Travel_Rarely 1005 Research & Development
## 858 858 31 No Travel_Frequently 798 Research & Development
## 859 859 41 No Travel_Rarely 933 Research & Development
## 860 860 44 Yes Travel_Rarely 621 Research & Development
## 861 861 51 No Travel_Frequently 968 Research & Development
## 862 862 43 No Travel_Rarely 531 Sales
## 863 863 34 Yes Non-Travel 1362 Sales
## 864 864 47 No Travel_Frequently 217 Sales
## 865 865 45 No Travel_Rarely 1448 Research & Development
## 866 866 48 No Travel_Rarely 855 Research & Development
## 867 867 32 No Non-Travel 976 Sales
## 868 868 47 No Travel_Rarely 571 Sales
## 869 869 45 No Travel_Rarely 1457 Research & Development
## 870 870 35 No Travel_Frequently 138 Research & Development
## DistanceFromHome Education EducationField EmployeeCount EmployeeNumber
## 846 17 2 Medical 1 536
## 847 3 1 Medical 1 201
## 848 1 3 Medical 1 417
## 849 22 2 Marketing 1 492
## 850 6 3 Other 1 1918
## 851 20 3 Life Sciences 1 175
## 852 9 2 Medical 1 41
## 853 15 2 Life Sciences 1 15
## 854 16 2 Medical 1 412
## 855 16 2 Other 1 1886
## 856 7 1 Life Sciences 1 1492
## 857 3 3 Technical Degree 1 297
## 858 7 2 Life Sciences 1 442
## 859 9 4 Life Sciences 1 200
## 860 15 3 Medical 1 1295
## 861 6 2 Medical 1 1297
## 862 4 4 Marketing 1 1293
## 863 19 3 Marketing 1 502
## 864 3 3 Medical 1 746
## 865 29 3 Technical Degree 1 1465
## 866 4 3 Life Sciences 1 1363
## 867 26 4 Marketing 1 333
## 868 14 3 Medical 1 1503
## 869 7 3 Medical 1 1195
## 870 2 3 Medical 1 269
## EnvironmentSatisfaction Gender HourlyRate JobInvolvement JobLevel
## 846 3 Male 79 3 2
## 847 2 Female 79 3 1
## 848 3 Male 48 4 3
## 849 3 Male 68 2 2
## 850 3 Male 61 4 1
## 851 2 Female 71 1 2
## 852 4 Male 83 2 1
## 853 4 Female 49 2 2
## 854 2 Male 68 4 2
## 855 4 Female 44 2 2
## 856 4 Male 76 3 1
## 857 4 Female 88 3 1
## 858 3 Female 48 2 3
## 859 3 Male 94 3 1
## 860 1 Female 73 3 3
## 861 2 Female 40 2 1
## 862 4 Female 56 2 3
## 863 1 Male 67 4 2
## 864 4 Female 49 3 4
## 865 2 Male 55 3 3
## 866 4 Male 54 3 3
## 867 3 Male 100 3 2
## 868 3 Female 78 3 2
## 869 1 Female 83 3 1
## 870 2 Female 37 3 2
## JobRole JobSatisfaction MaritalStatus MonthlyIncome
## 846 Laboratory Technician 1 Divorced 4558
## 847 Laboratory Technician 2 Single 1483
## 848 Healthcare Representative 4 Divorced 9613
## 849 Sales Executive 3 Married 6380
## 850 Laboratory Technician 4 Married 2544
## 851 Sales Executive 3 Married 4559
## 852 Laboratory Technician 3 Single 2206
## 853 Laboratory Technician 3 Single 4193
## 854 Healthcare Representative 1 Single 5661
## 855 Manufacturing Director 2 Married 4788
## 856 Sales Representative 3 Married 2404
## 857 Research Scientist 1 Single 2657
## 858 Manufacturing Director 3 Married 8943
## 859 Laboratory Technician 1 Married 2238
## 860 Healthcare Representative 4 Married 7978
## 861 Laboratory Technician 3 Single 2838
## 862 Sales Executive 4 Single 10231
## 863 Sales Executive 4 Single 5304
## 864 Sales Executive 3 Divorced 13770
## 865 Manufacturing Director 4 Married 9380
## 866 Manufacturing Director 4 Single 7898
## 867 Sales Executive 4 Married 4465
## 868 Sales Executive 3 Married 4591
## 869 Research Scientist 3 Married 4477
## 870 Laboratory Technician 2 Single 4425
## MonthlyRate NumCompaniesWorked Over18 OverTime PercentSalaryHike
## 846 13535 1 Y No 12
## 847 16102 1 Y No 14
## 848 10942 0 Y No 17
## 849 6110 2 Y Yes 12
## 850 7102 0 Y No 18
## 851 24788 3 Y Yes 11
## 852 16117 1 Y No 13
## 853 12682 0 Y Yes 12
## 854 4824 0 Y No 19
## 855 25388 0 Y Yes 11
## 856 16192 1 Y No 13
## 857 8556 5 Y Yes 11
## 858 14034 1 Y No 24
## 859 6961 2 Y No 21
## 860 14075 1 Y No 11
## 861 4257 0 Y No 14
## 862 20364 3 Y No 14
## 863 4652 8 Y Yes 13
## 864 10225 9 Y Yes 12
## 865 14720 4 Y Yes 18
## 866 18706 1 Y No 11
## 867 12069 0 Y No 18
## 868 24200 3 Y Yes 17
## 869 20100 4 Y Yes 19
## 870 15986 5 Y No 11
## PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## 846 3 4 80 1
## 847 3 4 80 0
## 848 3 1 80 3
## 849 3 1 80 2
## 850 3 1 80 1
## 851 3 3 80 1
## 852 3 1 80 0
## 853 3 4 80 0
## 854 3 3 80 0
## 855 3 4 80 0
## 856 3 1 80 1
## 857 3 3 80 0
## 858 4 1 80 1
## 859 4 4 80 1
## 860 3 4 80 1
## 861 3 2 80 0
## 862 3 4 80 0
## 863 3 2 80 0
## 864 3 4 80 2
## 865 3 4 80 2
## 866 3 3 80 0
## 867 3 1 80 0
## 868 3 3 80 1
## 869 3 3 80 1
## 870 3 4 80 0
## TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## 846 10 2 3 10
## 847 1 3 3 1
## 848 19 5 2 18
## 849 8 6 3 6
## 850 8 3 3 7
## 851 4 2 3 2
## 852 10 5 3 10
## 853 10 3 3 9
## 854 9 2 3 8
## 855 4 2 3 3
## 856 1 3 3 1
## 857 8 5 3 5
## 858 10 2 3 10
## 859 7 2 3 5
## 860 10 2 3 10
## 861 8 6 2 7
## 862 23 3 4 21
## 863 9 3 2 5
## 864 28 2 2 22
## 865 10 4 4 3
## 866 11 2 3 10
## 867 4 2 3 3
## 868 11 4 2 5
## 869 7 2 2 3
## 870 10 5 3 6
## YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager
## 846 0 1 8
## 847 0 0 0
## 848 10 3 7
## 849 4 1 0
## 850 7 7 7
## 851 2 2 2
## 852 0 1 8
## 853 5 0 8
## 854 3 0 7
## 855 2 0 2
## 856 0 0 0
## 857 2 0 4
## 858 9 8 9
## 859 0 1 4
## 860 7 0 5
## 861 0 7 7
## 862 7 15 17
## 863 2 0 4
## 864 2 11 13
## 865 1 1 2
## 866 9 0 8
## 867 2 2 2
## 868 4 1 2
## 869 2 0 2
## 870 2 1 2
## ID Age BusinessTravel DailyRate Department
## 276 1446 29 Travel_Rarely 592 Research & Development
## 277 1447 26 Travel_Frequently 1283 Sales
## 278 1448 43 Travel_Rarely 574 Research & Development
## 279 1449 32 Travel_Rarely 371 Sales
## 280 1450 22 Travel_Rarely 534 Research & Development
## 281 1451 28 Travel_Rarely 1404 Research & Development
## 282 1452 29 Travel_Frequently 806 Research & Development
## 283 1453 54 Travel_Rarely 1082 Sales
## 284 1454 56 Travel_Frequently 1240 Research & Development
## 285 1455 29 Travel_Frequently 115 Sales
## 286 1456 23 Travel_Rarely 1320 Research & Development
## 287 1457 41 Travel_Rarely 1085 Research & Development
## 288 1458 27 Travel_Rarely 1220 Research & Development
## 289 1459 44 Travel_Frequently 920 Research & Development
## 290 1460 22 Travel_Rarely 604 Research & Development
## 291 1461 44 Travel_Rarely 625 Research & Development
## 292 1462 37 Travel_Rarely 571 Research & Development
## 293 1463 30 Travel_Rarely 241 Research & Development
## 294 1464 49 Travel_Frequently 1023 Sales
## 295 1465 26 Travel_Rarely 474 Research & Development
## 296 1466 36 Travel_Rarely 938 Research & Development
## 297 1467 32 Travel_Rarely 1093 Sales
## 298 1468 26 Travel_Rarely 1357 Research & Development
## 299 1469 45 Travel_Rarely 252 Research & Development
## 300 1470 47 Travel_Rarely 249 Sales
## DistanceFromHome Education EducationField EmployeeCount EmployeeNumber
## 276 7 3 Life Sciences 1 1883
## 277 1 3 Medical 1 956
## 278 11 3 Life Sciences 1 1971
## 279 19 3 Life Sciences 1 1739
## 280 15 3 Medical 1 144
## 281 17 3 Technical Degree 1 1960
## 282 1 4 Life Sciences 1 710
## 283 2 4 Life Sciences 1 1070
## 284 9 3 Medical 1 1071
## 285 13 3 Technical Degree 1 1487
## 286 8 1 Medical 1 1684
## 287 2 4 Life Sciences 1 927
## 288 5 3 Life Sciences 1 434
## 289 24 3 Life Sciences 1 392
## 290 6 1 Medical 1 675
## 291 4 3 Medical 1 852
## 292 10 1 Life Sciences 1 802
## 293 7 3 Medical 1 1609
## 294 2 3 Medical 1 2065
## 295 3 3 Life Sciences 1 1581
## 296 2 4 Medical 1 958
## 297 6 4 Medical 1 125
## 298 25 3 Life Sciences 1 55
## 299 1 3 Other 1 336
## 300 2 2 Marketing 1 903
## EnvironmentSatisfaction Gender HourlyRate JobInvolvement JobLevel
## 276 4 Male 59 3 1
## 277 3 Male 52 2 2
## 278 1 Male 30 3 3
## 279 4 Male 80 1 3
## 280 2 Female 59 3 1
## 281 3 Male 32 2 1
## 282 2 Male 76 1 1
## 283 3 Female 41 2 3
## 284 1 Female 63 3 1
## 285 1 Female 51 3 2
## 286 4 Male 93 2 1
## 287 2 Female 57 1 1
## 288 3 Female 85 3 1
## 289 4 Male 43 3 1
## 290 1 Male 69 3 1
## 291 4 Male 50 3 2
## 292 4 Female 82 3 1
## 293 2 Male 48 2 1
## 294 4 Male 63 2 2
## 295 1 Female 89 3 1
## 296 3 Male 79 3 1
## 297 2 Male 87 3 2
## 298 1 Male 48 1 1
## 299 3 Male 70 4 5
## 300 3 Female 35 3 2
## JobRole JobSatisfaction MaritalStatus MonthlyIncome
## 276 Laboratory Technician 1 Single 2062
## 277 Sales Executive 1 Single 4294
## 278 Healthcare Representative 3 Married 7510
## 279 Sales Executive 3 Married 9610
## 280 Laboratory Technician 4 Single 2871
## 281 Laboratory Technician 4 Divorced 2367
## 282 Research Scientist 4 Divorced 2720
## 283 Sales Executive 3 Married 10686
## 284 Research Scientist 3 Married 2942
## 285 Sales Executive 2 Single 5765
## 286 Laboratory Technician 3 Single 3989
## 287 Laboratory Technician 4 Divorced 2778
## 288 Research Scientist 2 Single 2478
## 289 Laboratory Technician 3 Divorced 3161
## 290 Research Scientist 3 Married 2773
## 291 Healthcare Representative 2 Single 5933
## 292 Research Scientist 1 Divorced 2782
## 293 Research Scientist 2 Married 2141
## 294 Sales Executive 2 Married 5390
## 295 Research Scientist 4 Married 2061
## 296 Laboratory Technician 3 Single 2519
## 297 Sales Executive 3 Single 5010
## 298 Laboratory Technician 3 Single 2293
## 299 Manager 4 Married 19202
## 300 Sales Executive 4 Married 4537
## MonthlyRate NumCompaniesWorked Over18 OverTime PercentSalaryHike
## 276 19384 3 Y No 14
## 277 11148 1 Y No 12
## 278 16873 1 Y No 17
## 279 3840 3 Y No 13
## 280 23785 1 Y No 15
## 281 18779 5 Y No 12
## 282 18959 1 Y No 18
## 283 8392 6 Y No 11
## 284 12154 2 Y No 19
## 285 17485 5 Y No 11
## 286 20586 1 Y Yes 11
## 287 17725 4 Y Yes 13
## 288 20938 1 Y Yes 12
## 289 19920 3 Y Yes 22
## 290 12145 0 Y No 20
## 291 5197 9 Y No 12
## 292 19905 0 Y Yes 13
## 293 5348 1 Y No 12
## 294 13243 2 Y No 14
## 295 11133 1 Y No 21
## 296 12287 4 Y No 21
## 297 24301 1 Y No 16
## 298 10558 1 Y No 12
## 299 15970 0 Y No 11
## 300 17783 0 Y Yes 22
## PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## 276 3 2 80 0
## 277 3 2 80 0
## 278 3 2 80 1
## 279 3 3 80 1
## 280 3 3 80 0
## 281 3 1 80 1
## 282 3 4 80 1
## 283 3 2 80 1
## 284 3 2 80 1
## 285 3 1 80 0
## 286 3 1 80 0
## 287 3 3 80 1
## 288 3 2 80 0
## 289 4 4 80 1
## 290 4 4 80 0
## 291 3 4 80 0
## 292 3 2 80 2
## 293 3 2 80 1
## 294 3 4 80 0
## 295 4 1 80 0
## 296 4 3 80 0
## 297 3 1 80 0
## 298 3 3 80 0
## 299 3 3 80 1
## 300 4 1 80 1
## TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## 276 11 2 3 3
## 277 7 2 3 7
## 278 10 1 3 10
## 279 10 2 1 4
## 280 1 5 3 0
## 281 6 2 2 4
## 282 10 5 3 10
## 283 13 4 3 9
## 284 18 4 3 5
## 285 7 4 1 5
## 286 5 2 3 5
## 287 10 1 2 7
## 288 4 2 2 4
## 289 19 0 1 1
## 290 3 3 3 2
## 291 10 2 2 5
## 292 6 3 2 5
## 293 6 3 2 6
## 294 17 3 2 9
## 295 1 5 3 1
## 296 16 6 3 11
## 297 12 0 3 11
## 298 1 2 2 1
## 299 25 2 3 24
## 300 8 2 3 7
## YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager
## 276 2 1 2
## 277 7 0 7
## 278 9 0 9
## 279 3 0 2
## 280 0 0 0
## 281 1 0 3
## 282 7 2 8
## 283 4 7 0
## 284 4 0 3
## 285 3 0 0
## 286 4 1 2
## 287 7 1 0
## 288 3 1 2
## 289 0 0 0
## 290 2 2 2
## 291 2 2 3
## 292 3 4 3
## 293 4 1 1
## 294 6 0 8
## 295 0 0 0
## 296 8 3 9
## 297 8 5 7
## 298 0 0 1
## 299 0 1 7
## 300 6 7 7
## ID Age Attrition BusinessTravel DailyRate Department
## 276 1146 45 No Travel_Rarely 374 Sales
## 277 1147 34 No Travel_Rarely 628 Research & Development
## 278 1148 31 No Travel_Rarely 471 Research & Development
## 279 1149 29 No Travel_Rarely 1010 Research & Development
## 280 1150 30 No Travel_Rarely 288 Research & Development
## 281 1151 38 No Travel_Frequently 693 Research & Development
## 282 1152 27 No Travel_Frequently 793 Sales
## 283 1153 54 No Travel_Frequently 928 Research & Development
## 284 1154 28 Yes Travel_Rarely 1485 Research & Development
## 285 1155 49 No Travel_Rarely 470 Research & Development
## 286 1156 51 No Travel_Rarely 313 Research & Development
## 287 1157 29 Yes Travel_Rarely 341 Sales
## 288 1158 36 No Travel_Rarely 430 Research & Development
## 289 1159 27 No Travel_Rarely 199 Research & Development
## 290 1160 36 No Travel_Rarely 216 Research & Development
## 291 1161 44 Yes Travel_Rarely 1097 Research & Development
## 292 1162 38 No Travel_Rarely 168 Research & Development
## 293 1163 29 No Travel_Frequently 574 Research & Development
## 294 1164 37 No Travel_Rarely 799 Research & Development
## 295 1165 26 Yes Travel_Frequently 887 Research & Development
## 296 1166 32 No Travel_Frequently 116 Research & Development
## 297 1167 40 No Travel_Rarely 1492 Research & Development
## 298 1168 30 No Travel_Rarely 1082 Sales
## 299 1169 27 No Travel_Rarely 798 Research & Development
## 300 1170 37 No Travel_Rarely 290 Research & Development
## DistanceFromHome Education EducationField EmployeeCount EmployeeNumber
## 276 20 3 Life Sciences 1 2046
## 277 8 3 Medical 1 2068
## 278 4 3 Medical 1 1916
## 279 1 3 Life Sciences 1 1249
## 280 2 3 Life Sciences 1 117
## 281 7 3 Life Sciences 1 1382
## 282 2 1 Life Sciences 1 1371
## 283 20 4 Life Sciences 1 450
## 284 12 1 Life Sciences 1 1175
## 285 20 4 Medical 1 170
## 286 3 3 Medical 1 258
## 287 1 3 Medical 1 896
## 288 2 4 Other 1 1847
## 289 6 3 Life Sciences 1 1162
## 290 6 2 Medical 1 178
## 291 10 4 Life Sciences 1 1200
## 292 1 3 Life Sciences 1 743
## 293 20 1 Medical 1 1852
## 294 1 3 Technical Degree 1 623
## 295 5 2 Medical 1 848
## 296 13 3 Other 1 1234
## 297 20 4 Technical Degree 1 1092
## 298 12 3 Technical Degree 1 533
## 299 6 4 Medical 1 655
## 300 21 3 Life Sciences 1 267
## EnvironmentSatisfaction Gender HourlyRate JobInvolvement JobLevel
## 276 4 Female 50 3 2
## 277 2 Male 82 4 2
## 278 1 Female 62 4 1
## 279 1 Female 97 3 1
## 280 3 Male 99 2 2
## 281 4 Male 57 4 1
## 282 4 Male 43 1 2
## 283 4 Female 31 3 2
## 284 3 Female 79 3 1
## 285 3 Female 96 3 2
## 286 4 Female 98 3 4
## 287 2 Female 48 2 1
## 288 4 Female 73 3 2
## 289 4 Male 55 2 1
## 290 2 Male 84 3 2
## 291 3 Male 96 3 1
## 292 3 Female 81 3 3
## 293 4 Male 40 3 1
## 294 2 Female 59 3 3
## 295 3 Female 88 2 1
## 296 3 Female 77 2 1
## 297 1 Male 61 3 3
## 298 2 Female 83 3 2
## 299 1 Female 66 2 1
## 300 2 Male 65 4 1
## JobRole JobSatisfaction MaritalStatus MonthlyRate
## 276 Sales Executive 3 Single 23333
## 277 Laboratory Technician 3 Married 10228
## 278 Laboratory Technician 3 Divorced 16031
## 279 Research Scientist 4 Divorced 5598
## 280 Healthcare Representative 4 Married 15830
## 281 Research Scientist 3 Divorced 15748
## 282 Sales Executive 4 Single 20392
## 283 Research Scientist 3 Single 16885
## 284 Laboratory Technician 4 Married 22955
## 285 Manufacturing Director 1 Married 5549
## 286 Healthcare Representative 2 Single 7192
## 287 Sales Representative 3 Divorced 23522
## 288 Research Scientist 2 Married 19573
## 289 Research Scientist 3 Married 7950
## 290 Manufacturing Director 2 Divorced 2819
## 291 Research Scientist 3 Single 10826
## 292 Manufacturing Director 3 Single 15397
## 293 Laboratory Technician 4 Married 7003
## 294 Manufacturing Director 4 Single 23848
## 295 Research Scientist 3 Married 20898
## 296 Laboratory Technician 2 Married 7331
## 297 Healthcare Representative 4 Married 26542
## 298 Sales Executive 3 Single 19558
## 299 Research Scientist 3 Divorced 5013
## 300 Research Scientist 1 Married 22977
## NumCompaniesWorked Over18 OverTime PercentSalaryHike PerformanceRating
## 276 8 Y No 15 3
## 277 2 Y No 12 3
## 278 8 Y No 12 3
## 279 1 Y No 15 3
## 280 1 Y No 19 3
## 281 1 Y No 11 3
## 282 3 Y No 20 4
## 283 3 Y No 12 3
## 284 1 Y Yes 11 3
## 285 1 Y No 14 3
## 286 3 Y No 18 3
## 287 6 Y Yes 19 3
## 288 4 Y Yes 22 4
## 289 1 Y No 13 3
## 290 6 Y No 20 4
## 291 1 Y Yes 11 3
## 292 4 Y Yes 14 3
## 293 1 Y No 13 3
## 294 4 Y No 17 3
## 295 1 Y Yes 14 3
## 296 1 Y No 20 4
## 297 4 Y No 20 4
## 298 0 Y No 11 3
## 299 0 Y No 12 3
## 300 1 Y Yes 12 3
## RelationshipSatisfaction StandardHours StockOptionLevel TotalWorkingYears
## 276 3 80 0 8
## 277 1 80 0 6
## 278 2 80 1 4
## 279 1 80 3 3
## 280 1 80 3 11
## 281 4 80 3 4
## 282 2 80 0 8
## 283 4 80 0 20
## 284 4 80 0 1
## 285 3 80 0 16
## 286 3 80 0 21
## 287 3 80 3 5
## 288 4 80 1 15
## 289 3 80 1 4
## 290 4 80 2 7
## 291 3 80 0 6
## 292 4 80 0 10
## 293 2 80 0 11
## 294 4 80 0 12
## 295 1 80 1 8
## 296 3 80 1 2
## 297 4 80 1 14
## 298 2 80 0 6
## 299 3 80 2 6
## 300 1 80 1 8
## TrainingTimesLastYear WorkLifeBalance YearsAtCompany YearsInCurrentRole
## 276 3 3 5 3
## 277 3 4 4 3
## 278 0 2 2 2
## 279 5 3 3 2
## 280 3 3 11 10
## 281 2 3 4 2
## 282 3 3 6 2
## 283 4 2 4 3
## 284 4 2 1 1
## 285 2 2 15 11
## 286 6 3 7 7
## 287 3 3 3 2
## 288 2 3 1 0
## 289 0 3 4 2
## 290 0 3 3 2
## 291 4 3 6 4
## 292 4 4 1 0
## 293 3 4 11 8
## 294 3 4 6 5
## 295 2 3 8 7
## 296 2 3 2 2
## 297 6 3 11 10
## 298 6 3 5 4
## 299 5 2 5 3
## 300 3 2 8 7
## YearsSinceLastPromotion YearsWithCurrManager
## 276 0 1
## 277 1 2
## 278 2 2
## 279 1 2
## 280 10 8
## 281 0 3
## 282 0 0
## 283 0 3
## 284 0 0
## 285 5 11
## 286 1 0
## 287 0 2
## 288 0 0
## 289 2 2
## 290 0 1
## 291 0 2
## 292 0 0
## 293 3 10
## 294 1 2
## 295 1 7
## 296 2 2
## 297 11 1
## 298 4 4
## 299 0 3
## 300 1 7
## [1] 870 36
## [1] 300 35
## [1] 300 35
## ID Age Attrition
## 0 0 0
## BusinessTravel DailyRate Department
## 0 0 0
## DistanceFromHome Education EducationField
## 0 0 0
## EmployeeCount EmployeeNumber EnvironmentSatisfaction
## 0 0 0
## Gender HourlyRate JobInvolvement
## 0 0 0
## JobLevel JobRole JobSatisfaction
## 0 0 0
## MaritalStatus MonthlyIncome MonthlyRate
## 0 0 0
## NumCompaniesWorked Over18 OverTime
## 0 0 0
## PercentSalaryHike PerformanceRating RelationshipSatisfaction
## 0 0 0
## StandardHours StockOptionLevel TotalWorkingYears
## 0 0 0
## TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## 0 0 0
## YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager
## 0 0 0
## ID Age BusinessTravel
## 0 0 0
## DailyRate Department DistanceFromHome
## 0 0 0
## Education EducationField EmployeeCount
## 0 0 0
## EmployeeNumber EnvironmentSatisfaction Gender
## 0 0 0
## HourlyRate JobInvolvement JobLevel
## 0 0 0
## JobRole JobSatisfaction MaritalStatus
## 0 0 0
## MonthlyIncome MonthlyRate NumCompaniesWorked
## 0 0 0
## Over18 OverTime PercentSalaryHike
## 0 0 0
## PerformanceRating RelationshipSatisfaction StandardHours
## 0 0 0
## StockOptionLevel TotalWorkingYears TrainingTimesLastYear
## 0 0 0
## WorkLifeBalance YearsAtCompany YearsInCurrentRole
## 0 0 0
## YearsSinceLastPromotion YearsWithCurrManager
## 0 0
## ID Age Attrition
## 0 0 0
## BusinessTravel DailyRate Department
## 0 0 0
## DistanceFromHome Education EducationField
## 0 0 0
## EmployeeCount EmployeeNumber EnvironmentSatisfaction
## 0 0 0
## Gender HourlyRate JobInvolvement
## 0 0 0
## JobLevel JobRole JobSatisfaction
## 0 0 0
## MaritalStatus MonthlyRate NumCompaniesWorked
## 0 0 0
## Over18 OverTime PercentSalaryHike
## 0 0 0
## PerformanceRating RelationshipSatisfaction StandardHours
## 0 0 0
## StockOptionLevel TotalWorkingYears TrainingTimesLastYear
## 0 0 0
## WorkLifeBalance YearsAtCompany YearsInCurrentRole
## 0 0 0
## YearsSinceLastPromotion YearsWithCurrManager
## 0 0
# Simple Data frame with four variables
df <- df_cs2 %>%
select(ID, JobRole, JobSatisfaction, MonthlyIncome)
# Data frame employee job information
df_job <- df_cs2 %>%
#filter(Attrition == "Yes") %>%
select(ID, Attrition, JobInvolvement, JobLevel, JobRole, JobSatisfaction)
# Data frame employee pay information
df_pay <- df_cs2 %>%
#filter(Attrition == "Yes") %>%
select(ID, DailyRate, HourlyRate, MonthlyRate, MonthlyIncome, OverTime, PercentSalaryHike, StockOptionLevel, StandardHours)
# Data frame employee basic information
df_emp <- df_cs2 %>%
#filter(Attrition == "Yes") %>%
select(ID, Age, Gender, Education, EducationField, Department, DistanceFromHome, MaritalStatus, NumCompaniesWorked, PerformanceRating, TotalWorkingYears, WorkLifeBalance, YearsAtCompany, YearsInCurrentRole, YearsWithCurrManager, YearsSinceLastPromotion)
# Count the number of employees with attrition, we will compare with rest of sample later
sum(df_cs2$Attrition == "Yes")
## [1] 140
### Company Profile Snapshot: Age, Gender, Education, Pay, Role ###
# Box plot age by department
box_plot_age_dept <- df_emp %>% ggplot(aes(Department, Age, fill = Department)) + geom_boxplot() + ggtitle(paste("Box plot of Age by Department, n = ", count(df_emp)))
ggplotly(box_plot_age_dept)
# Box plot years in current role by department
box_plot_yirc_dept <- df_emp %>% ggplot(aes(Department, YearsInCurrentRole, fill = Department)) + geom_boxplot() + ggtitle(paste("Box plot of Years In Current Role by Department, n = ", count(df_emp)))
ggplotly(box_plot_yirc_dept)
# Box plot age by job role
box_plot_age_role <- merge(df_emp, df_job) %>% ggplot(aes(JobRole, Age, fill = JobRole)) + geom_boxplot() + ggtitle(paste("Box plot of Age by Job Role, n = ", count(df_emp))) + theme(axis.text.x = element_text(angle = 15, vjust = 0.5, hjust = 1))
ggplotly(box_plot_age_role)
# Box plot years in current role by job role
box_plot_role_yicr <- merge(df_emp, df_job) %>% ggplot(aes(JobRole, YearsInCurrentRole, fill = JobRole)) + geom_boxplot() + ggtitle(paste("Box plot of Years in current role by Job Role, n = ", count(df_emp))) + xlab("Job Role") + ylab("Years in current role") + theme(axis.text.x = element_text(angle = 15, vjust = 0.5, hjust = 0.5))
ggplotly(box_plot_role_yicr)
# Bar plot job role by gender
bar_plot_dodge_role_gender <- merge(df_job, df_emp) %>% ggplot(aes(JobRole, fill = Gender)) + geom_bar(position = "dodge") + ggtitle(paste("Bar plot of Job Roles by Gender, n = ", count(df))) + xlab("Job Role") + ylab("Count") + theme(axis.text.x = element_text(angle = 15, vjust = 0.5, hjust = 0.5))
ggplotly(bar_plot_dodge_role_gender)
# Box plot of monthly income by job role
box_plot_income_role <- df %>% ggplot(aes(JobRole, MonthlyIncome, fill = JobRole)) + geom_boxplot() + ggtitle(paste("Box plot of Monthly Income by Job Role, n = ", count(df))) + xlab("Job Role") + ylab("Monthly Income (USD)") + theme(axis.text.x = element_text(angle = 15, vjust = 0.5, hjust = 0.5))
ggplotly(box_plot_income_role)
# Box plot hourly rate by education
box_plot_hourly_edu <- merge(df_emp, df_pay) %>% ggplot(aes(Education, HourlyRate, fill = EducationField)) + geom_boxplot() + ggtitle(paste("Box plot of Education v. Hourly Rate by Education Field, n = ", count(df_emp))) + xlab("Education") + ylab("Hourly Rate (USD/Hour)")
box_plot_hourly_edu
# Bar plot of job satisfaction by job role
bar_plot_dodge_job_sat <- df %>% ggplot(aes(JobSatisfaction, fill = JobRole)) + geom_bar(position = "dodge") + ggtitle(paste("Bar plot of Job Satisfaction by Job Roles, n = ", count(df))) + xlab("Job Satisfaction") + ylab("Count") + theme(axis.text.x = element_text(angle = 15, vjust = 0.5, hjust = 1))
ggplotly(bar_plot_dodge_job_sat)
# Histogram of monthly income
hist_wrap_income_role <- df %>% ggplot(aes(MonthlyIncome, fill = JobRole)) + geom_histogram(binwidth = 200) + ggtitle(paste("Histogram of Monthly Income by Job Role, n = ", count(df))) + xlab("Monthly Income (USD)") + ylab("Count") + theme(axis.text.x = element_text(angle = 15, vjust = 0, hjust = 0.1)) + facet_wrap(~JobRole)
ggplotly(hist_wrap_income_role)
# Histogram of job satisfaction by job role
hist_wrap_income_sat <- df %>% ggplot(aes(MonthlyIncome, fill = JobRole, position = "stack")) + geom_histogram(binwidth = 200) + ggtitle(paste("Histogram of Monthly Income by Job Satisfaction, n = ", count(df))) + xlab("Monthly Income (USD)") + ylab("Count") + facet_wrap(~JobSatisfaction)
ggplotly(hist_wrap_income_sat)
### Attritional Factors by Variables ###
# Bar plot of attrition by job role
hist_att_role <- merge(df_emp, df_job) %>% ggplot(aes(JobRole, fill = Attrition)) + geom_bar(position = "stack") + ggtitle(paste("Bar plot of Attrition by Job Role, n = ", count(df_emp))) + xlab("Job Role") + ylab("Count") + theme(axis.text.x = element_text(angle = 15, vjust = 0, hjust = 1))
ggplotly(hist_att_role)
# Bar plot of attrition v. gender by job role
hist_wrap_role_gender <- merge(df_emp, df_job) %>% ggplot(aes(Attrition, fill = Gender)) + geom_bar(position = "dodge") + ggtitle(paste("Bar plot of Attrition by Gender, n = ", count(df_emp))) + xlab("Attrition") + ylab("Count") + facet_wrap(~JobRole)
ggplotly(hist_wrap_role_gender)
# Plot of performance v. distance from home by job role
smooth_wrap_dist_rating <- merge(df_emp, df_job) %>% ggplot(aes(DistanceFromHome, PerformanceRating, color = JobRole)) + geom_smooth() + facet_wrap(~JobRole)
ggplotly(smooth_wrap_dist_rating)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
# Plot of performance v. number of companies worked by job role
smooth_wrap_numcos_rating <- merge(df_emp, df_job) %>% ggplot(aes(NumCompaniesWorked, PerformanceRating, color = JobRole)) + geom_smooth() + facet_wrap(~JobRole)
ggplotly(smooth_wrap_numcos_rating)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.045
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 3.045
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.1517e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 4
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## -0.045
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 3.045
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 1.1517e-16
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.035
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 1.035
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## -0.035
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.035
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 0
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 1
# Plot of performance v. years since last promotion by job role
smooth_wrap_promo_rating <- merge(df_emp, df_job) %>% ggplot(aes(YearsSinceLastPromotion, PerformanceRating, color = JobRole)) + geom_smooth() + facet_wrap(~JobRole)
ggplotly(smooth_wrap_promo_rating)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.025
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.025
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.2913e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 4
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## -0.025
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.025
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 1.2913e-16
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.075
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.075
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 2.285e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 4
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## -0.075
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.075
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 2.285e-16
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.055
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.055
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 4.4805e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 4
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## -0.055
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.055
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 4.4805e-16
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.035
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.035
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 4
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## -0.035
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.035
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 0
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4
# PercentHike v. Years In Current Role
plot_wrap_psh_yslp <- df_cs2 %>% ggplot(aes(YearsSinceLastPromotion, PercentSalaryHike)) + geom_point(aes(color = Attrition), size = 0.2, position = "jitter") + facet_wrap(~JobRole)
ggplotly(plot_wrap_psh_yslp)
# Scatter plot years since last promotion v. years in current role
plot_wrap_yslp_yicr <- df_cs2 %>% ggplot(aes(YearsSinceLastPromotion, YearsInCurrentRole)) + geom_point(aes(color = Attrition), size = 0.2, position = "jitter") + facet_wrap(~JobRole)
ggplotly(plot_wrap_yslp_yicr)
# *** Scatter plot years with current manager v. years in current role ***
plot_wrap_ywcm_yicr <- df_cs2 %>% ggplot(aes(YearsWithCurrManager, YearsInCurrentRole)) + geom_point(aes(color = Attrition), size = 0.2, position = "jitter") + facet_wrap(~JobRole)
ggplotly(plot_wrap_ywcm_yicr)
# Scatter plot years with current manager v. job satisfaction
plot_wrap_ywcm_js <- df_cs2 %>% ggplot(aes(YearsWithCurrManager, JobSatisfaction)) + geom_point(aes(color = Attrition), size = 0.2, position = "jitter") + facet_wrap(~JobRole)
ggplotly(plot_wrap_ywcm_js)
#source("analysis/knn.R")
# Classification - attrition by leadership management
set.seed(6)
split_percent <- 0.7
l_train <- df_cs2 %>%
select(Attrition, YearsWithCurrManager, YearsInCurrentRole)
trainIndices = sample(1:dim(l_train)[1], round(split_percent * dim(l_train)[1]))
train = l_train[trainIndices,]
test = l_train[-trainIndices,]
scatter_smooth_ywcm_yicr <- l_train %>% ggplot(aes(YearsWithCurrManager, YearsInCurrentRole, color = Attrition)) + geom_point(position = "jitter") + geom_smooth(aes(color = Attrition)) + ggtitle(paste("Plot of Years with current manager v. Years in current role, n = ", count(l_train)))
ggplotly(scatter_smooth_ywcm_yicr)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
classification <- knn(train[,2:3], test[,2:3], train$Attrition, prob = TRUE, k = 5)
table(classification, test$Attrition)
##
## classification No Yes
## No 216 43
## Yes 2 0
confusionMatrix(table(classification, test$Attrition))
## Confusion Matrix and Statistics
##
##
## classification No Yes
## No 216 43
## Yes 2 0
##
## Accuracy : 0.8276
## 95% CI : (0.7762, 0.8714)
## No Information Rate : 0.8352
## P-Value [Acc > NIR] : 0.6673
##
## Kappa : -0.0149
##
## Mcnemar's Test P-Value : 2.479e-09
##
## Sensitivity : 0.9908
## Specificity : 0.0000
## Pos Pred Value : 0.8340
## Neg Pred Value : 0.0000
## Prevalence : 0.8352
## Detection Rate : 0.8276
## Detection Prevalence : 0.9923
## Balanced Accuracy : 0.4954
##
## 'Positive' Class : No
##
# Classification - attrition by time duration
set.seed(6)
split_percent <- 0.7
l_train <- df_cs2 %>%
select(Attrition, YearsAtCompany, YearsInCurrentRole)
trainIndices = sample(1:dim(l_train)[1], round(split_percent * dim(l_train)[1]))
train = l_train[trainIndices,]
test = l_train[-trainIndices,]
scatter_smooth_yac_yicr <- l_train %>% ggplot(aes(YearsAtCompany, YearsInCurrentRole, color = Attrition)) + geom_point(position = "jitter") + geom_smooth(aes(color = Attrition)) + ggtitle(paste("Plot of Years at company v. years in current role, n = ", count(l_train)))
ggplotly(scatter_smooth_yac_yicr)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
classification <- knn(train[,2:3], test[,2:3], train$Attrition, prob = TRUE, k = 5)
table(classification, test$Attrition)
##
## classification No Yes
## No 218 43
## Yes 0 0
confusionMatrix(table(classification, test$Attrition))
## Confusion Matrix and Statistics
##
##
## classification No Yes
## No 218 43
## Yes 0 0
##
## Accuracy : 0.8352
## 95% CI : (0.7846, 0.8781)
## No Information Rate : 0.8352
## P-Value [Acc > NIR] : 0.5406
##
## Kappa : 0
##
## Mcnemar's Test P-Value : 1.504e-10
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.8352
## Neg Pred Value : NaN
## Prevalence : 0.8352
## Detection Rate : 0.8352
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : No
##
# Classification - attrition by peer relationship
set.seed(6)
split_percent <- 0.7
l_train <- df_cs2 %>%
select(Attrition, RelationshipSatisfaction, YearsWithCurrManager)
trainIndices = sample(1:dim(l_train)[1], round(split_percent * dim(l_train)[1]))
train = l_train[trainIndices,]
test = l_train[-trainIndices,]
scatter_smooth_ywcm_yicr <- l_train %>% ggplot(aes(RelationshipSatisfaction, YearsWithCurrManager, color = Attrition)) + geom_point(position = "jitter") + geom_smooth(aes(color = Attrition)) + ggtitle(paste("Plot of Number of companies v. years at company, n = ", count(l_train)))
ggplotly(scatter_smooth_ywcm_yicr)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 4.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 2.6883e-15
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 4.015
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 2.6883e-15
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 4.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 2.5079e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 4.015
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 2.5079e-16
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 1
classification <- knn(train[,2:3], test[,2:3], train$Attrition, prob = TRUE, k = 5)
table(classification, test$Attrition)
##
## classification No Yes
## No 218 43
## Yes 0 0
confusionMatrix(table(classification, test$Attrition))
## Confusion Matrix and Statistics
##
##
## classification No Yes
## No 218 43
## Yes 0 0
##
## Accuracy : 0.8352
## 95% CI : (0.7846, 0.8781)
## No Information Rate : 0.8352
## P-Value [Acc > NIR] : 0.5406
##
## Kappa : 0
##
## Mcnemar's Test P-Value : 1.504e-10
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.8352
## Neg Pred Value : NaN
## Prevalence : 0.8352
## Detection Rate : 0.8352
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : No
##
#source("analysis/nb.R")
# Continuous predictor of attrition by leadership
df_attr <- df_cs2 %>%
select(ID, Attrition, YearsWithCurrManager, YearsInCurrentRole) %>%
mutate(ID = as.factor(ID), YearsInCurrentRole = as.factor(YearsInCurrentRole), YearsWithCurrManager = as.factor(YearsWithCurrManager))
df_attr %>% ggplot(aes(YearsWithCurrManager, YearsInCurrentRole)) + geom_point(aes(color = Attrition), position = "jitter", size = 0.3) + geom_smooth(aes(color = Attrition), size = 0.3)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
iterations = 100
m_acc <- matrix(nrow = iterations)
split_percent <- 0.7
for(i in 1:iterations) {
train_indices = sample(1:dim(df_attr)[1], round(split_percent * dim(df_attr)[1]))
train = df_attr[train_indices,]
test = df_attr[-train_indices,]
model = naiveBayes(train[,3:4], train$Attrition)
table(predict(model, test[,3:4]), test$Attrition)
cm <- confusionMatrix(table(predict(model,test[,3:4]), test$Attrition))
m_acc[i] <- cm$overall[1]
}
mean_acc <- colMeans(m_acc)
mean_acc
## [1] 0.8068966
#source("analysis/lm.R")
# Linear Model Monthly Income v. Monthly Rate
fit <- lm(MonthlyIncome~HourlyRate, data = df_pay)
df_pay %>% ggplot(aes(MonthlyIncome, MonthlyRate)) + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
beta_0_hat <- fit$coefficients[1]
beta_1_hat <- fit$coefficients[2]
SE_beta_0_hat <- summary(fit)$coefficients[1,2]
SE_beta_1_hat <- summary(fit)$coefficients[2,2]
# Intercept
tstat_int <- beta_0_hat / SE_beta_0_hat
pvalue_int <- (1-pt(tstat_int, length(df_pay$MonthlyIncome)-2)) * 2
tstat_int
## (Intercept)
## 11.94203
pvalue_int
## (Intercept)
## 0
# Slope
tstat_slope <- beta_1_hat / SE_beta_1_hat
pvalue_slope <- (pt(tstat_slope, length(df_pay)-2)) * 2
tstat_slope
## HourlyRate
## 0.0704479
pvalue_slope
## HourlyRate
## 1.054192
summary(fit)
##
## Call:
## lm(formula = MonthlyIncome ~ HourlyRate, data = df_pay)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5322 -3539 -1444 1779 13609
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6354.4250 532.1060 11.94 <2e-16 ***
## HourlyRate 0.5462 7.7535 0.07 0.944
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4600 on 868 degrees of freedom
## Multiple R-squared: 5.718e-06, Adjusted R-squared: -0.001146
## F-statistic: 0.004963 on 1 and 868 DF, p-value: 0.9439
confint(fit)
## 2.5 % 97.5 %
## (Intercept) 5310.06024 7398.78985
## HourlyRate -14.67154 15.76397
# Welch Two Sample t-test
t.test(df_pay$MonthlyIncome, df_pay$MonthlyRate)
##
## Welch Two Sample t-test
##
## data: df_pay$MonthlyIncome and df_pay$MonthlyRate
## t = -27.648, df = 1487.8, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8498.351 -7372.362
## sample estimates:
## mean of x mean of y
## 6390.264 14325.621
# Conduct Hypothesis Test
# LOOCV
pred_error_sq <- c(0)
for(i in 1:dim(df_pay)[1]) {
loocv_i <- df_pay[-i,]
fit <- lm(MonthlyIncome ~ MonthlyRate, data = loocv_i)
pred_i <- predict(fit, data.frame(MonthlyRate = df_pay[i,4]))
pred_error_sq <- pred_error_sq + (df_pay[i,4] - pred_i)^2
}
SSE <- var(df_pay$MonthlyIncome)
R_squared <- 1 - (pred_error_sq/SSE)
MSE <- pred_error_sq / length(df_pay)
RMSE <- sqrt(pred_error_sq/length(df_pay))
RMSE
## 1
## 102794.5